Accusoft.PdfXpress7.ActiveX
Search Text in a PDF Document

PDF Xpress™ supports application searching of text, using the TextFinder object.

VB Example
Copy Code
'This code demonstrates using a TextFinder to extract searchable text from a PDF document,
'locate exact phrase matches, then report the matched text, the words around the match, and the
'coordinates of the text on the PDF page
On Error GoTo error
'' Initialize PdfXpress instance
Dim pdfxpress1 As New PdfXpress
pdfxpress1.Initialize
'' Open existing PDF document to search
Dim document As New PdfDocument
document.SetParentControl pdfxpress1
document.OpenDocument PDF_FILENAME, PDF_PASSWORD
'' Prepare to extract text from each page of the document
Dim oTextFinderOptions As New TextFinderOptions
oTextFinderOptions.IgnoreTaggedPdfArtifacts = False
oTextFinderOptions.IgnoreTextAnnotations = True
'' For each page in the PDF document
Dim lngPageNumber As Long
For lngPageNumber = 0 To document.DocumentPageCount - 1
    '' Get all searchable text from the current PDF page.
    Dim oTextFinder As TextFinder
    Set oTextFinder = document.GetTextFinder(lngPageNumber, oTextFinderOptions)
    Dim strSearchDomain As String
    strSearchDomain = oTextFinder.GetText
   
    '' Locate all an exact occurrence of the search phrase
    '' in the search domain.
    Dim lngSearchCursor As Long
    For lngSearchCursor = 1 To Len(strSearchDomain)
   
        '' Locate the next match offset:
        '' - If a match is not made, lngSearchCursor will equal zero.
        '' - If a match is made, lngSearchCursor is the begin text offset.
        lngSearchCursor = InStr(lngSearchCursor, strSearchDomain, SEARCH_PHRASE)
       
        '' If a match is not found, then search the next PDF page.
        If (lngSearchCursor = 0) Then Exit For
       
        '' Prepare to recover a single word before the matched phrase
        '' and four words after matched phrase.
        Dim oTextMatchOptions As New TextMatchOptions
        oTextMatchOptions.NumberOfWordsBeforeMatchToReport = 1
        oTextMatchOptions.NumberOfWordsAfterMatchToReport = 4
        oTextMatchOptions.BeginTextOffset = lngSearchCursor
        oTextMatchOptions.EndTextOffset = lngSearchCursor + Len(SEARCH_PHRASE)
       
        '' Report the text match
        Dim oTextMatch As TextMatch
        Set oTextMatch = oTextFinder.GetTextMatch(oTextMatchOptions)
        Debug.Print "[Page " & CLng(lngPageNumber + 1) & "] Found """ _
            & oTextMatch.WordsBefore _
            & oTextMatch.MatchedText _
            & oTextMatch.WordsAfter _
            & """"
           
        '' Report the location of the match, expressed in PDF user space coordinates
        Dim lngQuad As Long
        lngQuad = 0
        Dim oQuadrilateral As Quadrilateral
        For Each oQuadrilateral In oTextMatch.BoundingQuadrilaterals
            Debug.Print "   Quad[ " & lngQuad & " ] "
            Debug.Print "       bottomLeft  = ( " & oQuadrilateral.BottomLeftX  & ", " & oQuadrilateral.BottomLeftY & " }"
            Debug.Print "       bottomRight = ( " & oQuadrilateral.BottomRightX & ", " & oQuadrilateral.BottomRightY & " }"
            Debug.Print "       topLeftt    = ( " & oQuadrilateral.TopLeftY     & ", " & oQuadrilateral.TopLeftX & " }"
            Debug.Print "       topRight    = ( " & oQuadrilateral.TopRightX    & ", " & oQuadrilateral.TopRightY & " }"
            lngQuad = lngQuad + 1
        Next
       
    Next lngSearchCursor
Next lngPageNumber
           
.
.
.
GoTo finish
error:
If Err.Number <> 0 Then
    MsgBox Err.Description
    Resume Next
finish:
Set document = Nothing
pdfxpress1.Terminate
Set pdfxpress1= Nothing           
End If

 

See Also

 

 


©2017. Accusoft Corporation. All Rights Reserved.

Send Feedback